In [1]:
# importing libraries

import datetime
import os
import sys
import random

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import warnings
warnings.filterwarnings('ignore')

from plotly import tools, subplots
import plotly.offline as py
from plotly.offline import plot
py.init_notebook_mode(connected = True)
import plotly.graph_objs as go
import plotly.express as px
import plotly.figure_factory as ff
import plotly.io as pio

from datetime import datetime        
In [2]:
# Loading Data
confirmed_global = pd.read_csv('time_series_covid19_confirmed_global.csv')
deaths_global = pd.read_csv('time_series_covid19_deaths_global.csv')
recovered_global = pd.read_csv('time_series_covid19_recovered_global.csv')

confirmed_us = pd.read_csv('time_series_covid19_deaths_US.csv')
deaths_us = pd.read_csv('time_series_covid19_deaths_US.csv')
In [ ]:
 
In [3]:
confirmed_global
Out[3]:
Province/State Country/Region Lat Long 1/22/20 1/23/20 1/24/20 1/25/20 1/26/20 1/27/20 ... 5/19/20 5/20/20 5/21/20 5/22/20 5/23/20 5/24/20 5/25/20 5/26/20 5/27/20 5/28/20
0 NaN Afghanistan 33.000000 65.000000 0 0 0 0 0 0 ... 7653 8145 8676 9216 9998 10582 11173 11831 12456 13036
1 NaN Albania 41.153300 20.168300 0 0 0 0 0 0 ... 949 964 969 981 989 998 1004 1029 1050 1076
2 NaN Algeria 28.033900 1.659600 0 0 0 0 0 0 ... 7377 7542 7728 7918 8113 8306 8503 8697 8857 8997
3 NaN Andorra 42.506300 1.521800 0 0 0 0 0 0 ... 761 762 762 762 762 762 763 763 763 763
4 NaN Angola -11.202700 17.873900 0 0 0 0 0 0 ... 52 52 58 60 61 69 70 70 71 74
... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ...
261 NaN Sao Tome and Principe 0.186360 6.613081 0 0 0 0 0 0 ... 251 251 251 251 251 251 299 441 443 458
262 NaN Yemen 15.552727 48.516388 0 0 0 0 0 0 ... 167 184 197 209 212 222 233 249 256 278
263 NaN Comoros -11.645500 43.333300 0 0 0 0 0 0 ... 11 34 34 78 78 87 87 87 87 87
264 NaN Tajikistan 38.861034 71.276093 0 0 0 0 0 0 ... 1936 2140 2350 2551 2738 2929 3100 3266 3424 3563
265 NaN Lesotho -29.609988 28.233608 0 0 0 0 0 0 ... 1 1 1 2 2 2 2 2 2 2

266 rows × 132 columns

In [4]:
# Renaming the columns
confirmed_global.rename(columns = {
    'Country/Region':'Country_Region',
    'Province/State': 'Province_State',
    'Longitude': 'Long',
    'Latitude': 'Lat'
}, inplace = True)

recovered_global.rename(columns = {
    'Country/Region':'Country_Region',
    'Province/State': 'Province_State',
    'Longitude': 'Long',
    'Latitude': 'Lat'
}, inplace = True)

deaths_global.rename(columns = {
    'Country/Region':'Country_Region',
    'Province/State': 'Province_State',
    'Longitude': 'Long',
    'Latitude': 'Lat'
}, inplace = True)
In [5]:
#calculatinng the NaN values
print(confirmed_global.isnull().sum())
print(recovered_global.isnull().sum())
print(deaths_global.isnull().sum())
Province_State    185
Country_Region      0
Lat                 0
Long                0
1/22/20             0
                 ... 
5/24/20             0
5/25/20             0
5/26/20             0
5/27/20             0
5/28/20             0
Length: 132, dtype: int64
Province_State    186
Country_Region      0
Lat                 0
Long                0
1/22/20             0
                 ... 
5/24/20             0
5/25/20             0
5/26/20             0
5/27/20             0
5/28/20             0
Length: 132, dtype: int64
Province_State    185
Country_Region      0
Lat                 0
Long                0
1/22/20             0
                 ... 
5/24/20             0
5/25/20             0
5/26/20             0
5/27/20             0
5/28/20             0
Length: 132, dtype: int64
In [6]:
# replacing Nan values to string values 'nan'
confirmed_global["Province_State"].fillna("nan", inplace = True)
recovered_global["Province_State"].fillna("nan", inplace = True)
deaths_global["Province_State"].fillna("nan", inplace = True)
In [7]:
confirmed_global.Country_Region.unique()
Out[7]:
array(['Afghanistan', 'Albania', 'Algeria', 'Andorra', 'Angola',
       'Antigua and Barbuda', 'Argentina', 'Armenia', 'Australia',
       'Austria', 'Azerbaijan', 'Bahamas', 'Bahrain', 'Bangladesh',
       'Barbados', 'Belarus', 'Belgium', 'Benin', 'Bhutan', 'Bolivia',
       'Bosnia and Herzegovina', 'Brazil', 'Brunei', 'Bulgaria',
       'Burkina Faso', 'Cabo Verde', 'Cambodia', 'Cameroon', 'Canada',
       'Central African Republic', 'Chad', 'Chile', 'China', 'Colombia',
       'Congo (Brazzaville)', 'Congo (Kinshasa)', 'Costa Rica',
       "Cote d'Ivoire", 'Croatia', 'Diamond Princess', 'Cuba', 'Cyprus',
       'Czechia', 'Denmark', 'Djibouti', 'Dominican Republic', 'Ecuador',
       'Egypt', 'El Salvador', 'Equatorial Guinea', 'Eritrea', 'Estonia',
       'Eswatini', 'Ethiopia', 'Fiji', 'Finland', 'France', 'Gabon',
       'Gambia', 'Georgia', 'Germany', 'Ghana', 'Greece', 'Guatemala',
       'Guinea', 'Guyana', 'Haiti', 'Holy See', 'Honduras', 'Hungary',
       'Iceland', 'India', 'Indonesia', 'Iran', 'Iraq', 'Ireland',
       'Israel', 'Italy', 'Jamaica', 'Japan', 'Jordan', 'Kazakhstan',
       'Kenya', 'Korea, South', 'Kuwait', 'Kyrgyzstan', 'Latvia',
       'Lebanon', 'Liberia', 'Liechtenstein', 'Lithuania', 'Luxembourg',
       'Madagascar', 'Malaysia', 'Maldives', 'Malta', 'Mauritania',
       'Mauritius', 'Mexico', 'Moldova', 'Monaco', 'Mongolia',
       'Montenegro', 'Morocco', 'Namibia', 'Nepal', 'Netherlands',
       'New Zealand', 'Nicaragua', 'Niger', 'Nigeria', 'North Macedonia',
       'Norway', 'Oman', 'Pakistan', 'Panama', 'Papua New Guinea',
       'Paraguay', 'Peru', 'Philippines', 'Poland', 'Portugal', 'Qatar',
       'Romania', 'Russia', 'Rwanda', 'Saint Lucia',
       'Saint Vincent and the Grenadines', 'San Marino', 'Saudi Arabia',
       'Senegal', 'Serbia', 'Seychelles', 'Singapore', 'Slovakia',
       'Slovenia', 'Somalia', 'South Africa', 'Spain', 'Sri Lanka',
       'Sudan', 'Suriname', 'Sweden', 'Switzerland', 'Taiwan*',
       'Tanzania', 'Thailand', 'Togo', 'Trinidad and Tobago', 'Tunisia',
       'Turkey', 'Uganda', 'Ukraine', 'United Arab Emirates',
       'United Kingdom', 'Uruguay', 'US', 'Uzbekistan', 'Venezuela',
       'Vietnam', 'Zambia', 'Zimbabwe', 'Dominica', 'Grenada',
       'Mozambique', 'Syria', 'Timor-Leste', 'Belize', 'Laos', 'Libya',
       'West Bank and Gaza', 'Guinea-Bissau', 'Mali',
       'Saint Kitts and Nevis', 'Kosovo', 'Burma', 'MS Zaandam',
       'Botswana', 'Burundi', 'Sierra Leone', 'Malawi', 'South Sudan',
       'Western Sahara', 'Sao Tome and Principe', 'Yemen', 'Comoros',
       'Tajikistan', 'Lesotho'], dtype=object)
In [8]:
confirmed_global = confirmed_global[~confirmed_global["Province_State"].str.match('Diamond Princess')]
confirmed_global = confirmed_global[~confirmed_global["Country_Region"].str.match('Diamond Princess')]

deaths_global = deaths_global[~deaths_global["Province_State"].str.match('Diamond Princess')]
deaths_global = deaths_global[~deaths_global["Country_Region"].str.match('Diamond Princess')]

recovered_global = recovered_global[~recovered_global["Province_State"].str.match('Diamond Princess')]
recovered_global = recovered_global[~recovered_global["Country_Region"].str.match('Diamond Princess')]
In [9]:
# declaring function for converting Date formats
def convert_date(data):
    try:
        data.columns = list(data.columns[:4]) + [datetime.strptime(dt, "%m/%d/%y").date().strftime("%Y-%m-%d") for dt in data.columns[4:]]
    except:
        data.columns = list(data.columns[:4]) + [datetime.strptime(dt, "%m/%d/%Y").date().strftime("%Y-%m-%d") for dt in data.columns[4:]]
In [10]:
# calling function to change date formats
convert_date(confirmed_global)
convert_date(recovered_global)
convert_date(deaths_global)
In [11]:
confirmed_global
Out[11]:
Province_State Country_Region Lat Long 2020-01-22 2020-01-23 2020-01-24 2020-01-25 2020-01-26 2020-01-27 ... 2020-05-19 2020-05-20 2020-05-21 2020-05-22 2020-05-23 2020-05-24 2020-05-25 2020-05-26 2020-05-27 2020-05-28
0 nan Afghanistan 33.000000 65.000000 0 0 0 0 0 0 ... 7653 8145 8676 9216 9998 10582 11173 11831 12456 13036
1 nan Albania 41.153300 20.168300 0 0 0 0 0 0 ... 949 964 969 981 989 998 1004 1029 1050 1076
2 nan Algeria 28.033900 1.659600 0 0 0 0 0 0 ... 7377 7542 7728 7918 8113 8306 8503 8697 8857 8997
3 nan Andorra 42.506300 1.521800 0 0 0 0 0 0 ... 761 762 762 762 762 762 763 763 763 763
4 nan Angola -11.202700 17.873900 0 0 0 0 0 0 ... 52 52 58 60 61 69 70 70 71 74
... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ...
261 nan Sao Tome and Principe 0.186360 6.613081 0 0 0 0 0 0 ... 251 251 251 251 251 251 299 441 443 458
262 nan Yemen 15.552727 48.516388 0 0 0 0 0 0 ... 167 184 197 209 212 222 233 249 256 278
263 nan Comoros -11.645500 43.333300 0 0 0 0 0 0 ... 11 34 34 78 78 87 87 87 87 87
264 nan Tajikistan 38.861034 71.276093 0 0 0 0 0 0 ... 1936 2140 2350 2551 2738 2929 3100 3266 3424 3563
265 nan Lesotho -29.609988 28.233608 0 0 0 0 0 0 ... 1 1 1 2 2 2 2 2 2 2

264 rows × 132 columns

In [12]:
confirmed_global_df = confirmed_global.melt(id_vars = ['Country_Region','Province_State','Lat','Long'],
                                            value_vars = confirmed_global.columns[4:],
                                            var_name = 'Date',
                                            value_name = 'Confirmed_Cases')
In [13]:
deaths_global_df = deaths_global.melt(id_vars = ['Country_Region','Province_State','Lat','Long'],
                                            value_vars = confirmed_global.columns[4:],
                                            var_name = 'Date',
                                            value_name = 'Deaths')
In [14]:
recovered_global_df = recovered_global.melt(id_vars = ['Country_Region','Province_State','Lat','Long'],
                                            value_vars = confirmed_global.columns[4:],
                                            var_name = 'Date',
                                            value_name = 'Recovered')
In [15]:
recovered_global_df
Out[15]:
Country_Region Province_State Lat Long Date Recovered
0 Afghanistan nan 33.000000 65.000000 2020-01-22 0
1 Albania nan 41.153300 20.168300 2020-01-22 0
2 Algeria nan 28.033900 1.659600 2020-01-22 0
3 Andorra nan 42.506300 1.521800 2020-01-22 0
4 Angola nan -11.202700 17.873900 2020-01-22 0
... ... ... ... ... ... ...
32251 Sao Tome and Principe nan 0.186360 6.613081 2020-05-28 68
32252 Yemen nan 15.552727 48.516388 2020-05-28 11
32253 Comoros nan -11.645500 43.333300 2020-05-28 24
32254 Tajikistan nan 38.861034 71.276093 2020-05-28 1674
32255 Lesotho nan -29.609988 28.233608 2020-05-28 1

32256 rows × 6 columns

In [16]:
train = confirmed_global_df.merge(deaths_global_df, on = ['Country_Region', 'Province_State','Date'])
train = train.merge(recovered_global_df, on = ['Country_Region', 'Province_State','Date'])
In [17]:
global_dataset = train.groupby('Date')['Confirmed_Cases','Recovered','Deaths'].sum().reset_index()
In [18]:
global_dataset['Daily_Rise'] = global_dataset['Confirmed_Cases'] - global_dataset['Confirmed_Cases'].shift(1)
global_dataset['Mortality_Rate'] = global_dataset['Deaths']/ global_dataset['Confirmed_Cases']
global_dataset
Out[18]:
Date Confirmed_Cases Recovered Deaths Daily_Rise Mortality_Rate
0 2020-01-22 555 28 17 NaN 0.030631
1 2020-01-23 654 30 18 99.0 0.027523
2 2020-01-24 941 36 26 287.0 0.027630
3 2020-01-25 1434 39 42 493.0 0.029289
4 2020-01-26 2117 52 56 683.0 0.026453
... ... ... ... ... ... ...
123 2020-05-24 5320795 2123914 338511 95638.0 0.063620
124 2020-05-25 5407230 2186436 339563 86435.0 0.062798
125 2020-05-26 5500824 2240953 343686 93594.0 0.062479
126 2020-05-27 5602089 2303189 348739 101265.0 0.062252
127 2020-05-28 5718258 2368348 353313 116169.0 0.061787

128 rows × 6 columns

In [19]:
global_dataset_df = pd.melt(global_dataset,
                           id_vars = ['Date'],
                           value_vars = ['Confirmed_Cases','Recovered', 'Mortality_Rate', 'Deaths', 'Daily_Rise'])
global_dataset_df
Out[19]:
Date variable value
0 2020-01-22 Confirmed_Cases 555.0
1 2020-01-23 Confirmed_Cases 654.0
2 2020-01-24 Confirmed_Cases 941.0
3 2020-01-25 Confirmed_Cases 1434.0
4 2020-01-26 Confirmed_Cases 2117.0
... ... ... ...
635 2020-05-24 Daily_Rise 95638.0
636 2020-05-25 Daily_Rise 86435.0
637 2020-05-26 Daily_Rise 93594.0
638 2020-05-27 Daily_Rise 101265.0
639 2020-05-28 Daily_Rise 116169.0

640 rows × 3 columns

In [20]:
visual_confirmed = global_dataset_df[global_dataset_df["variable"].str.match('Confirmed_Cases')]
visual_deaths = global_dataset_df[global_dataset_df["variable"].str.match('Deaths')]
visual_recovered = global_dataset_df[global_dataset_df["variable"].str.match('Recovered')]
visual_mortality = global_dataset_df[global_dataset_df["variable"].str.match('Mortality_Rate')]
visual_daily_rise = global_dataset_df[global_dataset_df["variable"].str.match('Daily_Rise')]
In [21]:
visual_confirmed
Out[21]:
Date variable value
0 2020-01-22 Confirmed_Cases 555.0
1 2020-01-23 Confirmed_Cases 654.0
2 2020-01-24 Confirmed_Cases 941.0
3 2020-01-25 Confirmed_Cases 1434.0
4 2020-01-26 Confirmed_Cases 2117.0
... ... ... ...
123 2020-05-24 Confirmed_Cases 5320795.0
124 2020-05-25 Confirmed_Cases 5407230.0
125 2020-05-26 Confirmed_Cases 5500824.0
126 2020-05-27 Confirmed_Cases 5602089.0
127 2020-05-28 Confirmed_Cases 5718258.0

128 rows × 3 columns

In [22]:
visual_deaths
Out[22]:
Date variable value
384 2020-01-22 Deaths 17.0
385 2020-01-23 Deaths 18.0
386 2020-01-24 Deaths 26.0
387 2020-01-25 Deaths 42.0
388 2020-01-26 Deaths 56.0
... ... ... ...
507 2020-05-24 Deaths 338511.0
508 2020-05-25 Deaths 339563.0
509 2020-05-26 Deaths 343686.0
510 2020-05-27 Deaths 348739.0
511 2020-05-28 Deaths 353313.0

128 rows × 3 columns

In [23]:
visual_recovered
Out[23]:
Date variable value
128 2020-01-22 Recovered 28.0
129 2020-01-23 Recovered 30.0
130 2020-01-24 Recovered 36.0
131 2020-01-25 Recovered 39.0
132 2020-01-26 Recovered 52.0
... ... ... ...
251 2020-05-24 Recovered 2123914.0
252 2020-05-25 Recovered 2186436.0
253 2020-05-26 Recovered 2240953.0
254 2020-05-27 Recovered 2303189.0
255 2020-05-28 Recovered 2368348.0

128 rows × 3 columns

In [24]:
visual_mortality
Out[24]:
Date variable value
256 2020-01-22 Mortality_Rate 0.030631
257 2020-01-23 Mortality_Rate 0.027523
258 2020-01-24 Mortality_Rate 0.027630
259 2020-01-25 Mortality_Rate 0.029289
260 2020-01-26 Mortality_Rate 0.026453
... ... ... ...
379 2020-05-24 Mortality_Rate 0.063620
380 2020-05-25 Mortality_Rate 0.062798
381 2020-05-26 Mortality_Rate 0.062479
382 2020-05-27 Mortality_Rate 0.062252
383 2020-05-28 Mortality_Rate 0.061787

128 rows × 3 columns

In [25]:
visual_daily_rise
Out[25]:
Date variable value
512 2020-01-22 Daily_Rise NaN
513 2020-01-23 Daily_Rise 99.0
514 2020-01-24 Daily_Rise 287.0
515 2020-01-25 Daily_Rise 493.0
516 2020-01-26 Daily_Rise 683.0
... ... ... ...
635 2020-05-24 Daily_Rise 95638.0
636 2020-05-25 Daily_Rise 86435.0
637 2020-05-26 Daily_Rise 93594.0
638 2020-05-27 Daily_Rise 101265.0
639 2020-05-28 Daily_Rise 116169.0

128 rows × 3 columns

In [26]:
fig = px.line(global_dataset_df,
             x = 'Date',
             y = 'value',
             color = 'variable',
             title = 'Global Confirmed/ Deaths/ REcovered/ cases with Mortality and Daily Rises')
fig.show()
In [27]:
fig = px.line(global_dataset_df,
             x = 'Date',
             y = 'value',
             color = 'variable',
             title = 'Global Confirmed/ Deaths/ REcovered/ cases with Mortality and Daily Rises',
             log_y = True)
fig.show()
In [28]:
fig = px.line(visual_confirmed,
             x = 'Date',
             y = 'value',
             color = 'variable',
             title = 'Confirmed cases over time(Globally)')
fig.show()
In [29]:
fig = px.line(visual_deaths,
             x = 'Date',
             y = 'value',
             color = 'variable',
             title = 'Deaths reported over time(Globally)')
fig.show()
In [30]:
fig = px.line(visual_mortality,
             x = 'Date',
             y = 'value',
             color = 'variable',
             title = 'Change in Mortality Rate over time')
fig.show()
In [31]:
fig = px.line(visual_daily_rise,
             x = 'Date',
             y = 'value',
             color = 'variable',
             title = 'Daily rise of the infected people(Globally)')
fig.show()
In [32]:
country_wise = train.groupby(['Country_Region','Province_State','Date'])['Confirmed_Cases','Recovered','Deaths'].sum().reset_index()
In [33]:
country_wise
Out[33]:
Country_Region Province_State Date Confirmed_Cases Recovered Deaths
0 Afghanistan nan 2020-01-22 0 0 0
1 Afghanistan nan 2020-01-23 0 0 0
2 Afghanistan nan 2020-01-24 0 0 0
3 Afghanistan nan 2020-01-25 0 0 0
4 Afghanistan nan 2020-01-26 0 0 0
... ... ... ... ... ... ...
32123 Zimbabwe nan 2020-05-24 56 25 4
32124 Zimbabwe nan 2020-05-25 56 25 4
32125 Zimbabwe nan 2020-05-26 56 25 4
32126 Zimbabwe nan 2020-05-27 132 25 4
32127 Zimbabwe nan 2020-05-28 149 28 4

32128 rows × 6 columns

In [34]:
country_wise = pd.melt(country_wise,
                           id_vars = ['Date','Country_Region','Province_State'],
                           value_vars = ['Confirmed_Cases','Recovered', 'Deaths'])
In [35]:
country_wise
Out[35]:
Date Country_Region Province_State variable value
0 2020-01-22 Afghanistan nan Confirmed_Cases 0
1 2020-01-23 Afghanistan nan Confirmed_Cases 0
2 2020-01-24 Afghanistan nan Confirmed_Cases 0
3 2020-01-25 Afghanistan nan Confirmed_Cases 0
4 2020-01-26 Afghanistan nan Confirmed_Cases 0
... ... ... ... ... ...
96379 2020-05-24 Zimbabwe nan Deaths 4
96380 2020-05-25 Zimbabwe nan Deaths 4
96381 2020-05-26 Zimbabwe nan Deaths 4
96382 2020-05-27 Zimbabwe nan Deaths 4
96383 2020-05-28 Zimbabwe nan Deaths 4

96384 rows × 5 columns

In [36]:
country_wise_visual_confirmed = country_wise[country_wise["variable"].str.match('Confirmed_Cases')]
country_wise_visual_deaths = country_wise[country_wise["variable"].str.match('Deaths')]
country_wise_visual_recovered = country_wise[country_wise["variable"].str.match('Recovered')]
In [37]:
country_wise
Out[37]:
Date Country_Region Province_State variable value
0 2020-01-22 Afghanistan nan Confirmed_Cases 0
1 2020-01-23 Afghanistan nan Confirmed_Cases 0
2 2020-01-24 Afghanistan nan Confirmed_Cases 0
3 2020-01-25 Afghanistan nan Confirmed_Cases 0
4 2020-01-26 Afghanistan nan Confirmed_Cases 0
... ... ... ... ... ...
96379 2020-05-24 Zimbabwe nan Deaths 4
96380 2020-05-25 Zimbabwe nan Deaths 4
96381 2020-05-26 Zimbabwe nan Deaths 4
96382 2020-05-27 Zimbabwe nan Deaths 4
96383 2020-05-28 Zimbabwe nan Deaths 4

96384 rows × 5 columns

In [38]:
fig = px.line(country_wise_visual_confirmed,
             x = 'Date',
             y = 'value',
             color = 'Country_Region',
             title = 'Confirmed cases')
fig.show()
In [39]:
fig = px.line(country_wise_visual_deaths,
             x = 'Date',
             y = 'value',
             color = 'Country_Region',
             title = 'People that are missed')
fig.show()
In [40]:
fig = px.line(country_wise_visual_recovered,
             x = 'Date',
             y = 'value',
             color = 'Country_Region',
             title = 'Recovered cases')
fig.show()
In [41]:
current_date = country_wise_visual_confirmed['Date'][country_wise_visual_confirmed.index[-1]]
In [42]:
# finding out total deaths, confirmed cases and recovered cases 
country_wise_total_confirmed = country_wise_visual_confirmed[country_wise_visual_confirmed["Date"].str.match(current_date)]
country_wise_total_deaths = country_wise_visual_deaths[country_wise_visual_deaths["Date"].str.match(current_date)]
country_wise_total_recovered = country_wise_visual_recovered[country_wise_visual_recovered["Date"].str.match(current_date)]
In [43]:
country_wise_total_deaths
Out[43]:
Date Country_Region Province_State variable value
64383 2020-05-28 Afghanistan nan Deaths 235
64511 2020-05-28 Albania nan Deaths 33
64639 2020-05-28 Algeria nan Deaths 630
64767 2020-05-28 Andorra nan Deaths 51
64895 2020-05-28 Angola nan Deaths 4
... ... ... ... ... ...
95871 2020-05-28 West Bank and Gaza nan Deaths 3
95999 2020-05-28 Western Sahara nan Deaths 1
96127 2020-05-28 Yemen nan Deaths 57
96255 2020-05-28 Zambia nan Deaths 7
96383 2020-05-28 Zimbabwe nan Deaths 4

251 rows × 5 columns

In [44]:
country_wise_total_recovered
Out[44]:
Date Country_Region Province_State variable value
32255 2020-05-28 Afghanistan nan Recovered 1209
32383 2020-05-28 Albania nan Recovered 823
32511 2020-05-28 Algeria nan Recovered 5277
32639 2020-05-28 Andorra nan Recovered 681
32767 2020-05-28 Angola nan Recovered 18
... ... ... ... ... ...
63743 2020-05-28 West Bank and Gaza nan Recovered 368
63871 2020-05-28 Western Sahara nan Recovered 6
63999 2020-05-28 Yemen nan Recovered 11
64127 2020-05-28 Zambia nan Recovered 779
64255 2020-05-28 Zimbabwe nan Recovered 28

251 rows × 5 columns

In [45]:
country_wise_total_confirmed
Out[45]:
Date Country_Region Province_State variable value
127 2020-05-28 Afghanistan nan Confirmed_Cases 13036
255 2020-05-28 Albania nan Confirmed_Cases 1076
383 2020-05-28 Algeria nan Confirmed_Cases 8997
511 2020-05-28 Andorra nan Confirmed_Cases 763
639 2020-05-28 Angola nan Confirmed_Cases 74
... ... ... ... ... ...
31615 2020-05-28 West Bank and Gaza nan Confirmed_Cases 446
31743 2020-05-28 Western Sahara nan Confirmed_Cases 9
31871 2020-05-28 Yemen nan Confirmed_Cases 278
31999 2020-05-28 Zambia nan Confirmed_Cases 1057
32127 2020-05-28 Zimbabwe nan Confirmed_Cases 149

251 rows × 5 columns

In [46]:
worst_hit_countries_30 = country_wise_total_confirmed.sort_values(by = 'value', ascending = False).head(30)

plt.figure(figsize = (12, 10))
sns.barplot(data = worst_hit_countries_30, y = 'Country_Region', x = 'value', hue = 'Country_Region', dodge = False)
plt.legend(loc = 'lower right')
plt.xlabel('Total Confirmed reported')
plt.ylabel('Countries')
plt.title('Worst 30 countries hit by  navirus(Confirmed)')
plt.show()
In [47]:
worst_hit_countries_30 = country_wise_total_deaths.sort_values(by = 'value', ascending = False).head(30)

plt.figure(figsize = (12, 10))
sns.barplot(data = worst_hit_countries_30, y = 'Country_Region', x = 'value', hue = 'Country_Region', dodge = False)
plt.legend(loc = 'lower right')
plt.xlabel('Most number of people missed')
plt.ylabel('Countries')
plt.title('Worst 30 countries hit by  navirus(Deaths)')
plt.show()
In [48]:
recovering_countries_30 = country_wise_total_recovered.sort_values(by = 'value', ascending = False).head(30)

fig = px.bar(recovering_countries_30,
             x='value', y='Country_Region', color='Country_Region', barmode='relative',
             title=f'Most Recovered', text='value', height=1500, width = 950, orientation='h')
fig.show()
In [49]:
locations = confirmed_global[confirmed_global["Province_State"].str.match('nan')].reset_index()
locations = locations[['Country_Region','Lat','Long']]
locations
Out[49]:
Country_Region Lat Long
0 Afghanistan 33.000000 65.000000
1 Albania 41.153300 20.168300
2 Algeria 28.033900 1.659600
3 Andorra 42.506300 1.521800
4 Angola -11.202700 17.873900
... ... ... ...
179 Sao Tome and Principe 0.186360 6.613081
180 Yemen 15.552727 48.516388
181 Comoros -11.645500 43.333300
182 Tajikistan 38.861034 71.276093
183 Lesotho -29.609988 28.233608

184 rows × 3 columns

In [50]:
country_wise_total_confirmed = country_wise_total_confirmed.groupby(['Country_Region','Date'])[['value']].sum().reset_index()
country_wise_total_deaths = country_wise_total_deaths.groupby(['Country_Region','Date'])[['value']].sum().reset_index()
country_wise_total_recovered = country_wise_total_recovered.groupby(['Country_Region','Date'])[['value']].sum().reset_index()
In [51]:
total_countries = country_wise_total_confirmed.merge(country_wise_total_deaths, on = ['Country_Region','Date'])
total_countries = total_countries.merge(country_wise_total_recovered, on = ['Country_Region','Date'])
In [52]:
total_countries
Out[52]:
Country_Region Date value_x value_y value
0 Afghanistan 2020-05-28 13036 235 1209
1 Albania 2020-05-28 1076 33 823
2 Algeria 2020-05-28 8997 630 5277
3 Andorra 2020-05-28 763 51 681
4 Angola 2020-05-28 74 4 18
... ... ... ... ... ...
181 West Bank and Gaza 2020-05-28 446 3 368
182 Western Sahara 2020-05-28 9 1 6
183 Yemen 2020-05-28 278 57 11
184 Zambia 2020-05-28 1057 7 779
185 Zimbabwe 2020-05-28 149 4 28

186 rows × 5 columns

In [53]:
country_wise_total_confirmed
Out[53]:
Country_Region Date value
0 Afghanistan 2020-05-28 13036
1 Albania 2020-05-28 1076
2 Algeria 2020-05-28 8997
3 Andorra 2020-05-28 763
4 Angola 2020-05-28 74
... ... ... ...
181 West Bank and Gaza 2020-05-28 446
182 Western Sahara 2020-05-28 9
183 Yemen 2020-05-28 278
184 Zambia 2020-05-28 1057
185 Zimbabwe 2020-05-28 149

186 rows × 3 columns

In [54]:
total_countries.rename(columns = {
    'value_x': 'Confirmed_Cases',
    'value_y': 'Deaths',
    'value' : 'Recovered'}, inplace = True)

#total_countries.drop(['variable_x','variable_y','variable'], axis = 1, inplace = True)
In [55]:
fig = px.choropleth(total_countries,
                   locations = 'Country_Region',
                   locationmode='country names',
                   color = 'Confirmed_Cases',
                   hover_name = 'Country_Region',
                   color_continuous_scale="Viridis",
                   scope = 'world',
                   hover_data = ['Confirmed_Cases','Deaths','Recovered'],
                   title='Covid19 cases worldwide')

fig.update_geos(fitbounds="locations", visible=True)
fig.update_layout(margin={"r":0,"t":0,"l":0,"b":0})

fig.show()
In [56]:
fig = px.choropleth(total_countries,
                   locations = 'Country_Region',
                   locationmode='country names',
                   color = 'Confirmed_Cases',
                   hover_name = 'Country_Region',
                   color_continuous_scale="Viridis",
                   scope = 'north america',
                   hover_data = ['Confirmed_Cases','Deaths','Recovered'],
                   title='Covid19 cases worldwide')
fig.update_geos(fitbounds="locations", visible=False)
fig.update_layout(margin={"r":0,"t":0,"l":0,"b":0})
fig.show()
In [57]:
country_names = []
for i in (total_countries["Country_Region"]):
    country_names.append(i)
In [58]:
country_names.remove('Holy See')
country_names.remove('Kosovo')
country_names.remove('MS Zaandam')
country_names.remove('Timor-Leste')
country_names.remove('US')
country_names.remove('West Bank and Gaza')
country_names.remove("Western Sahara")
In [59]:
country_names = [sub.replace('Burma', 'Myanmar') for sub in country_names] 
country_names = [sub.replace('Congo (Brazzaville)', 'Congo') for sub in country_names] 
country_names = [sub.replace('Congo (Kinshasa)', 'Democratic Republic of the Congo') for sub in country_names] 
country_names = [sub.replace('Cote d\'Ivoire', 'Côte d\'Ivoire') for sub in country_names] 
country_names = [sub.replace('Korea, South', 'South Korea') for sub in country_names] 
country_names = [sub.replace('Taiwan*', 'Taiwan') for sub in country_names]
In [60]:
from pycountry_convert import country_alpha2_to_continent_code, country_name_to_country_alpha2

continents = {
    'NA': 'North America',
    'SA': 'South America', 
    'AS': 'Asia',
    'OC': 'Australia',
    'AF': 'Africa',
    'EU': 'Europe'
}
y = [continents[country_alpha2_to_continent_code(country_name_to_country_alpha2(country))] for country in country_names]
In [61]:
#continent and countries
continents_country = pd.DataFrame(list(zip(country_names, y)), 
               columns =['Country_Region', 'Continent'])
In [62]:
continents_country['Country_Region'] = continents_country['Country_Region'].replace({'Congo':'Congo (Brazzaville)',
                                                                                  'Democratic Republic of the Congo':'Congo (Kinshasa)',
                                                                                  'Côte d\'Ivoire': 'Cote d\'Ivoire',
                                                                                  'South Korea': 'Korea, South',
                                                                                  'Myanmar': 'Burma',
                                                                                  'Taiwan': 'Taiwan*'})
In [63]:
total_countries_and_cont = total_countries.merge(continents_country, on = ['Country_Region'])
In [64]:
african_countries  = total_countries_and_cont[total_countries_and_cont["Continent"].str.match('Africa')].reset_index()
asian_countries  = total_countries_and_cont[total_countries_and_cont["Continent"].str.match('Asia')].reset_index()
north_american_countries  = total_countries_and_cont[total_countries_and_cont["Continent"].str.match('North America')].reset_index()
european_countries  = total_countries_and_cont[total_countries_and_cont["Continent"].str.match('Europe')].reset_index()
australian_countries  = total_countries_and_cont[total_countries_and_cont["Continent"].str.match('Australia')].reset_index()
south_american_countries  = total_countries_and_cont[total_countries_and_cont["Continent"].str.match('South America')].reset_index()


fig = px.choropleth(african_countries,
                   locations = 'Country_Region',
                   locationmode='country names',
                   color = 'Confirmed_Cases',
                   hover_name = 'Country_Region',
                   color_continuous_scale="Viridis",
                   scope = 'africa',
                   hover_data = ['Confirmed_Cases','Deaths','Recovered'],
                   title='Covid19 cases worldwide')
fig.update_geos(fitbounds="locations", visible=False)
fig.update_layout(margin={"r":0,"t":0,"l":0,"b":0})
fig.show()
In [65]:
fig = px.choropleth(asian_countries,
                   locations = 'Country_Region',
                   locationmode='country names',
                   color = 'Confirmed_Cases',
                   hover_name = 'Country_Region',
                   color_continuous_scale="Viridis",
                   scope = 'asia',
                   hover_data = ['Confirmed_Cases','Deaths','Recovered'],
                   title='Covid19 cases worldwide')
fig.update_geos(fitbounds="locations", visible=False)
fig.update_layout(margin={"r":0,"t":0,"l":0,"b":0})
fig.show()
In [66]:
fig = px.choropleth(south_american_countries,
                   locations = 'Country_Region',
                   locationmode='country names',
                   color = 'Confirmed_Cases',
                   hover_name = 'Country_Region',
                   color_continuous_scale="Viridis",
                   scope = 'south america',
                   hover_data = ['Confirmed_Cases','Deaths','Recovered'],
                   title='Covid19 cases worldwide')
fig.update_geos(fitbounds="locations", visible=False)
fig.update_layout(margin={"r":0,"t":0,"l":0,"b":0})
fig.show()
In [67]:
fig = px.choropleth(australian_countries,
                   locations = 'Country_Region',
                   locationmode='country names',
                   color = 'Confirmed_Cases',
                   hover_name = 'Country_Region',
                   color_continuous_scale="Viridis",
                   hover_data = ['Confirmed_Cases','Deaths','Recovered'],
                   title='Covid19 cases worldwide')
fig.update_geos(fitbounds="locations", visible=False)
fig.update_layout(margin={"r":0,"t":0,"l":0,"b":0})
fig.show()
In [68]:
fig = px.choropleth(european_countries,
                   locations = 'Country_Region',
                   locationmode='country names',
                   color = 'Confirmed_Cases',
                   hover_name = 'Country_Region',
                   color_continuous_scale="Viridis",
                   scope = 'europe',
                   hover_data = ['Confirmed_Cases','Deaths','Recovered'],
                   title='Covid19 cases worldwide')
fig.update_geos(fitbounds="locations", visible=False)
fig.update_layout(margin={"r":0,"t":0,"l":0,"b":0})
fig.show()
In [ ]:
 
In [69]:
confirmed_us.drop(['UID','iso2','iso3','Admin2','Country_Region','code3','Combined_Key','Population'],axis = 1, inplace = True)
In [70]:
confirmed_us
Out[70]:
FIPS Province_State Lat Long_ 1/22/20 1/23/20 1/24/20 1/25/20 1/26/20 1/27/20 ... 5/19/20 5/20/20 5/21/20 5/22/20 5/23/20 5/24/20 5/25/20 5/26/20 5/27/20 5/28/20
0 60.0 American Samoa -14.271000 -170.132000 0 0 0 0 0 0 ... 0 0 0 0 0 0 0 0 0 0
1 66.0 Guam 13.444300 144.793700 0 0 0 0 0 0 ... 5 5 5 5 5 5 5 5 5 5
2 69.0 Northern Mariana Islands 15.097900 145.673900 0 0 0 0 0 0 ... 2 2 2 2 2 2 2 2 2 2
3 72.0 Puerto Rico 18.220800 -66.590100 0 0 0 0 0 0 ... 124 125 126 126 127 127 129 129 129 131
4 78.0 Virgin Islands 18.335800 -64.896300 0 0 0 0 0 0 ... 6 6 6 6 6 6 6 6 6 6
... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ...
3256 NaN Utah 39.372319 -111.575868 0 0 0 0 0 0 ... 0 0 0 0 0 0 0 0 0 0
3257 NaN Utah 38.996171 -110.701396 0 0 0 0 0 0 ... 0 0 0 0 0 0 0 0 0 0
3258 NaN Utah 37.854472 -111.441876 0 0 0 0 0 0 ... 4 4 4 4 4 4 4 4 4 4
3259 NaN Utah 40.124915 -109.517442 0 0 0 0 0 0 ... 0 0 0 0 0 0 0 0 0 0
3260 NaN Utah 41.271160 -111.914512 0 0 0 0 0 0 ... 3 3 3 4 4 4 4 4 6 5

3261 rows × 132 columns

In [71]:
deaths_us.drop(['UID','iso2','iso3','Admin2','Country_Region','code3','Combined_Key','Population'], axis = 1, inplace = True)
In [72]:
# declaring function for converting Date formats
def convert_date_us(data):
    try:
        data.columns = list(data.columns[:4]) + [datetime.strptime(dt, "%m/%d/%y").date().strftime("%Y-%m-%d") for dt in data.columns[4:]]
    except:
        data.columns = list(data.columns[:4]) + [datetime.strptime(dt, "%m/%d/%Y").date().strftime("%Y-%m-%d") for dt in data.columns[4:]]
In [73]:
convert_date_us(deaths_us)
convert_date_us(confirmed_us)
In [74]:
confirmed_us
Out[74]:
FIPS Province_State Lat Long_ 2020-01-22 2020-01-23 2020-01-24 2020-01-25 2020-01-26 2020-01-27 ... 2020-05-19 2020-05-20 2020-05-21 2020-05-22 2020-05-23 2020-05-24 2020-05-25 2020-05-26 2020-05-27 2020-05-28
0 60.0 American Samoa -14.271000 -170.132000 0 0 0 0 0 0 ... 0 0 0 0 0 0 0 0 0 0
1 66.0 Guam 13.444300 144.793700 0 0 0 0 0 0 ... 5 5 5 5 5 5 5 5 5 5
2 69.0 Northern Mariana Islands 15.097900 145.673900 0 0 0 0 0 0 ... 2 2 2 2 2 2 2 2 2 2
3 72.0 Puerto Rico 18.220800 -66.590100 0 0 0 0 0 0 ... 124 125 126 126 127 127 129 129 129 131
4 78.0 Virgin Islands 18.335800 -64.896300 0 0 0 0 0 0 ... 6 6 6 6 6 6 6 6 6 6
... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ...
3256 NaN Utah 39.372319 -111.575868 0 0 0 0 0 0 ... 0 0 0 0 0 0 0 0 0 0
3257 NaN Utah 38.996171 -110.701396 0 0 0 0 0 0 ... 0 0 0 0 0 0 0 0 0 0
3258 NaN Utah 37.854472 -111.441876 0 0 0 0 0 0 ... 4 4 4 4 4 4 4 4 4 4
3259 NaN Utah 40.124915 -109.517442 0 0 0 0 0 0 ... 0 0 0 0 0 0 0 0 0 0
3260 NaN Utah 41.271160 -111.914512 0 0 0 0 0 0 ... 3 3 3 4 4 4 4 4 6 5

3261 rows × 132 columns

In [75]:
from urllib.request import urlopen
import json
with urlopen('https://raw.githubusercontent.com/plotly/datasets/master/geojson-counties-fips.json') as response:
    counties = json.load(response)
In [76]:
confirmed_us_df = confirmed_us.melt(id_vars = ['Province_State','FIPS','Lat','Long_'],
                                            value_vars = confirmed_us.columns[4:],
                                            var_name = 'Date',
                                            value_name = 'Confirmed_Cases')
In [77]:
deaths_us_df = deaths_us.melt(id_vars = ['Province_State','FIPS','Lat','Long_'],
                                            value_vars = deaths_us.columns[4:],
                                            var_name = 'Date',
                                            value_name = 'Confirmed_Cases')
In [78]:
confirmed_us_df = confirmed_us_df.groupby(['Province_State','FIPS','Date'])['Confirmed_Cases'].sum().reset_index()
In [79]:
confirmed_us_df.info()
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 416128 entries, 0 to 416127
Data columns (total 4 columns):
 #   Column           Non-Null Count   Dtype  
---  ------           --------------   -----  
 0   Province_State   416128 non-null  object 
 1   FIPS             416128 non-null  float64
 2   Date             416128 non-null  object 
 3   Confirmed_Cases  416128 non-null  int64  
dtypes: float64(1), int64(1), object(2)
memory usage: 12.7+ MB
In [80]:
confirmed_us_df['FIPS'] = confirmed_us_df['FIPS'].astype(int)
In [81]:
x = confirmed_us_df.groupby(['Province_State','FIPS'])['Confirmed_Cases'].sum().reset_index()
x
Out[81]:
Province_State FIPS Confirmed_Cases
0 Alabama 1001 139
1 Alabama 1003 254
2 Alabama 1005 30
3 Alabama 1007 21
4 Alabama 1009 12
... ... ... ...
3246 Wyoming 56041 0
3247 Wyoming 56043 0
3248 Wyoming 56045 0
3249 Wyoming 80056 0
3250 Wyoming 90056 284

3251 rows × 3 columns

In [82]:
fig = px.choropleth(x,
                    geojson=counties, 
                    locations='FIPS', color='Confirmed_Cases',
                    color_continuous_scale="Viridis",
                    range_color=(confirmed_us_df['Confirmed_Cases'].min(), confirmed_us_df['Confirmed_Cases'].max()),
                    hover_name = 'Province_State',
                   scope = 'usa')

fig.update_layout(margin={"r":0,"t":0,"l":0,"b":0})
fig.show()